#coding:utf-8
#万里学院本地书香列表页
from bs4 import BeautifulSoup
import requests
import os
import pymysql as mysql
import time
import math
import configparser

config=configparser.ConfigParser()	
with open("config.ini","r") as cfgfile:
	config.readfp(cfgfile)
	ROOT = config.get( "fileroot", "root" )



proxies = {
  "http": "192.168.0.72:8119",
}
hdrs = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.108 Safari/537.36',
		'Referer':'http://10.60.154.30:8089/sortBook.action',
		'Origin': 'http://10.60.154.30:8089',}


		
def insert_sql(sqlList):
	conn = mysql.connect( host = '192.168.0.91', port = 3306, user = 'root', passwd = 'vipdatacenter', db = 'wanli',charset='utf8mb4',)
	cur = conn.cursor()
	for sql in sqlList:
		cur.execute(sql)
	conn.commit()
	conn.close()		

if __name__=="__main__":
	root = os.path.join(ROOT, "20180423","list")
	if not os.path.exists(root):
		os.makedirs(root)
	for page in range(2061):
		sqlList = []
		print(page)
		sn = requests.Session()
		url = "http://10.60.154.30:8089/sortBook.action"
		data = {'currentPage':str(page+1),
				'currentCode':'',
			}
		try:
			r = sn.post(url,headers = hdrs,data = data,proxies=proxies)
		except:
			continue
		soup = BeautifulSoup(r.content, 'lxml')
		ul = soup.find("ul",class_ = "rec_book_resource")
		all_a = ul.find_all("li")
		for li in all_a:
			dt = li.find("dt")
			ta = dt.find("a")
			href = ta.get("href")
			list = href.split("=")
			if len(list) < 2:
				continue
			id = list[-1]
			sql =  "insert ignore into article (id) values ('%s')" %(id) 
			sqlList.append(sql)
		insert_sql(sqlList)
		